cd "/Users/mpang/Documents/RESEARCH/PartyInstitution/Replication file"

****************************************************************************************
****** Replicating results in Table 1: Comparing different measures of corruption ******
****************************************************************************************
use "corruptionmerge.dta", replace

*rescaling variables to a range of 0 and 1
sum corruption
gen VDem = (corruption-r(min))/(r(max)-r(min))
sum CPI
gen CPI2 = (CPI-r(min))/(r(max)-r(min))
sum cce
gen cce2 = 0 - cce
sum cce2
gen CCE2 = (cce-r(min))/(r(max)-r(min))

*calculate total and within-case variation
xtset gwf_caseid year 
xtsum VDem
gen TotalV = r(sd_b)^2 + r(sd_w)^2 
gen RatioWith = r(sd_w)^2/TotalV

xtsum CPI2
replace TotalV = r(sd_b)^2 + r(sd_w)^2 
replace RatioWith = r(sd_w)^2/TotalV

xtsum CCE2
replace TotalV = r(sd_b)^2 + r(sd_w)^2 
replace RatioWith = r(sd_w)^2/TotalV


save "corruption_rescaled.dta", replace


****************************************************************************************
* Merging datasets *
****************************************************************************************

import excel "/Users/mpang/Documents/RESEARCH/PartyInstitution/Replication file/PartyInsFull.xlsx", sheet("Sheet1") firstrow
sort gwf_country year
save "/Users/mpang/Documents/RESEARCH/PartyInstitution/Replication file/PartyInsFull.dta",replace


use "temp.dta",clear
drop _merge
sort gwf_country year
merge gwf_country year using "PartyInsFull.dta"


tab _merge
drop if gwf_leaderid ==.

drop if _merge == 1

save "Fulldata.dta"


*use "Fulldata.dta", replace

** Keeping only variables used in the paper
*keep cowcode year gwf_country gwf_caseid gwf_casename gwf_leaderid cg_regime lagcorruption leadermil ld time lagregionprotest election legcomp gr ///
 loggdp logoil lt lpop wdipopurb ythbul4 G_age nmc_logmilper paramil_pers repress inherit supportparty ///
 v2x_ex_party v2xnp_client v2xps_party coldwar mean5 ///
 partyrbrstmp militrank ldrrotation milconsult milmerit_mil milmerit_pers milnotrial plebiscit heirclan officepers ///
 paramil_pers paramil_party paramil_fightrebel supportparty partyleader partymins excomcivn multiethnic monoethnic heirparty ///
 heirfamily legcompetn leaderrelatvs leaderciv leadermil leaderrebel heirciv cabciv cabmil partymilit seizure_coup seizure_rebel ///
 seizure_uprising seizure_election seizure_succession seizure_family militparty_allyparty militparty_noparty militparty_priorparty ///
 nomilitary milethnic_inclusive milethnic_hetero milethnic_homo sectyapp_party sectyapp_pers electldr_notelect electldr_priordict ///
 electldr_priordem electldr_1candidate electldr_1faction electldr_multileg electldr_multiexec legnoms_indirect legnoms_veto ///
 legnoms_noveto legnoms_priordem ldr_exp_highrank ldr_exp_lowrank ldr_exp_rebel ldr_exp_demelect ldr_exp_supportparty ldr_exp_pers_loyal ///
 ldr_exp_pers_relative ldr_exp_rulingfamily ldr_exp_other ldr_group_priordem ldr_group_domparty ldr_group_military ldr_group_insurgency ///
 ldr_group_civsucc ldr_group_other ldr_group_foreign ldr_group_hereditary militparty_newparty partyhistory partylocal partyexcom_notpers ///
 v2palocoff v2paactcom v2pasoctie v2panom v2paind v2padisa

save "FullData_replication.dta"


*****************************************************************************************
******** Replicating results for the measure of ruling party institutionalization  ******
*****************************************************************************************

use "FullData_replication.dta"

** Choosing party-institutionalization related variables *
global xlist partyrbrstmp militrank ldrrotation milconsult milmerit_mil milmerit_pers milnotrial plebiscit heirclan officepers ///
 paramil_pers paramil_party paramil_fightrebel supportparty partyleader partymins excomcivn multiethnic monoethnic heirparty ///
 heirfamily legcompetn leaderrelatvs leaderciv leadermil leaderrebel heirciv cabciv cabmil partymilit seizure_coup seizure_rebel ///
 seizure_uprising seizure_election seizure_succession seizure_family militparty_allyparty militparty_noparty militparty_priorparty ///
 nomilitary milethnic_inclusive milethnic_hetero milethnic_homo sectyapp_party sectyapp_pers electldr_notelect electldr_priordict ///
 electldr_priordem electldr_1candidate electldr_1faction electldr_multileg electldr_multiexec legnoms_indirect legnoms_veto ///
 legnoms_noveto legnoms_priordem ldr_exp_highrank ldr_exp_lowrank ldr_exp_rebel ldr_exp_demelect ldr_exp_supportparty ldr_exp_pers_loyal ///
 ldr_exp_pers_relative ldr_exp_rulingfamily ldr_exp_other ldr_group_priordem ldr_group_domparty ldr_group_military ldr_group_insurgency ///
 ldr_group_civsucc ldr_group_other ldr_group_foreign ldr_group_hereditary militparty_newparty partyhistory partylocal partyexcom_notpers ///
 v2palocoff v2paactcom v2pasoctie v2panom v2paind v2padisa
 
 
 pca $xlist, comp(3)
estat loadings
* variables in IRT model: partylocal partyhistory heirparty v2palocoff v2paactcom v2pasoctie



** Latent measure of party institutionalization when a support party exists *
* Replicating results in Appendix A Table A1 *
global bvars = "partylocal partyhistory heirparty"
global cvars = "v2palocoff v2paactcom v2pasoctie"
pca $bvars $cvars if supportparty==1
alpha $bvars $cvars if supportparty==1,item std
estat loadings
sum $bvars $cvars if supportparty==1
sum $bvars $cvars 

* Get latent measure of party institutionalization when a support party exists *
gsem (PER->$bvars,logit) (PER-> $cvars if supportparty==1,reg var(PER@1) vce(cluster gwf_caseid)) 
predict pi if e(sample)==1,latent ebmeans

* Get discrimination and difficulties
preserve
drop _all
set obs 6
gen str item = " "  
gen diff =  .
gen disc    =  .
local j=1
local var = "partylocal partyhistory heirparty v2palocoff v2paactcom v2pasoctie"
foreach i  of local var {
replace diff = -_b[`i':_cons] / _b[`i':PER] in `j'
replace disc =  _b[`i':PER] in `j'
replace item = "`i'" in `j'
local j=`j'+1
}
egen rank_disc = rank(disc)
replace rank  =rank*-1 +10
sort rank_disc
drop rank
list in 1/6,clean noobs
restore

* Treat no ruling party as the LOWEST possible score *
qui sum pi
replace pi=r(min) if supportparty==0

* Rescale party institutionalization, 0-1 *
qui sum pi
replace pi=(pi+abs(r(min)))/(abs(r(min))+r(max))
hist pi,bin(50)
gen x= pi^2
swilk x pi  /* transformation makes full distribution more normal for OLS */
replace pi = x
drop x
hist pi,bin(50)
*save the data to create "DatawithPI.dta" for visualization


* Graphing discrimination
predict pr2pl*
twoway line pr2pl* pi, sort xlabel(0(0.1)1)
* pi is the level of party institutionalization. pr2pl1 to pr2pl6 represent the IRT discrimination of localorg///
*partyhis, heirparty, localoffice, localstrength, and affiliateorg


* Within Regime Variation of parti institutionalization 
* Replicating results in Table2 *
*party regime
xtset gwf_caseid year
xtsum pi if cg_regime==3 
gen TotalV = r(sd_b)^2 + r(sd_w)^2 
gen RatioWith = r(sd_w)^2/TotalV
*military regime
xtsum pi if cg_regime==4
replace TotalV = r(sd_b)^2 + r(sd_w)^2 
replace RatioWith = r(sd_w)^2/TotalV
*personalist regime
xtsum pi if cg_regime==5
replace TotalV = r(sd_b)^2 + r(sd_w)^2 
replace RatioWith = r(sd_w)^2/TotalV


* Within Group Variation of items that constuct pi
* Replicating results in Appendix A Table A2 *
keep gwf_caseid year $bvars $cvars pi
xtset gwf_caseid year

xtsum heirparty
gen TotalV = r(sd_b)^2 + r(sd_w)^2 
gen RatioWith = r(sd_w)^2/TotalV

xtsum partyhistory
replace TotalV = r(sd_b)^2 + r(sd_w)^2 
replace RatioWith = r(sd_w)^2/TotalV

xtsum partylocal
replace TotalV = r(sd_b)^2 + r(sd_w)^2 
replace RatioWith = r(sd_w)^2/TotalV

xtsum v2palocoff
replace TotalV = r(sd_b)^2 + r(sd_w)^2 
replace RatioWith = r(sd_w)^2/TotalV

xtsum v2paactcom
replace TotalV = r(sd_b)^2 + r(sd_w)^2 
replace RatioWith = r(sd_w)^2/TotalV

xtsum v2pasoctie
replace TotalV = r(sd_b)^2 + r(sd_w)^2 
replace RatioWith = r(sd_w)^2/TotalV

xtsum pi
replace TotalV = r(sd_b)^2 + r(sd_w)^2 
replace RatioWith = r(sd_w)^2/TotalV


***********************
*     Set global      *
***********************
sum lagcorruption, meanonly
gen corruptionct = lagcorruption - r(mean)

forval i =1/3 {
	qui tsset cowcode year
	qui gen l`i'mean5 =l`i'.mean5
}


global x1  = "leadermil ld time"
global x2 = "lagregionprotest election legcomp gr"

global ldv = "l1mean5 l2mean5 l3mean5"
global y   = "mean5"
global d   = "corruptionct"
global m   = "pi"


*****************************************************
* Check that lag DVs correct for serial correlation *
*****************************************************
qui reg mean5 corruptionct
abar,lags(3)
qui reg mean5 corruptionct l1mean5
abar,lags(3)
qui reg mean5 corruptionct l1mean5 l2mean5
abar,lags(3)
qui reg mean5 corruptionct l1mean5 l2mean5 l3mean5
abar,lags(3)


***********************************
*      Main regression results    *
* Replicating results in Table 3  *
***********************************

* Model with covariate

reg $y $ldv $d $m $x1 $x2,cluster(gwf_leaderid)

reg $y $ldv C.$d##C.$m $x1 $x2,cluster(gwf_leaderid)


lincom c.corruptionct#c.pi*0 + corruptionct
lincom c.corruptionct#c.pi*1 + corruptionct

lincom c.corruptionct#c.pi*0.22 + corruptionct
* Marginal effects saved as "marginal.csv"


* Baseline specification *
reg $y $ldv $d $m ld time if e(sample)==1,cluster(gwf_leaderid) 
reg $y $ldv C.$d##C.$m ld time if e(sample)==1,cluster(gwf_leaderid) 


lincom c.corruptionct#c.pi*0 + corruptionct
lincom c.corruptionct#c.pi*1 + corruptionct

lincom c.corruptionct#c.pi*0.44 + corruptionct




** Replicating results in Table A4
sum $y $d $m $x1 $x2 if e(sample)==1





****************************************************************
** Missing data in VDEM measure of party institutionalization **
**                Replicating results in Table A3             **
****************************************************************
tab gwf_casename if e(sample)==1 & v2xps_party==.
gen missp = v2xps_party==. if e(sample)==1
tab missp
ttest mean5 if e(sample)==1,by(missp) /* systematically lower protest for missing obs */
ttest pi if e(sample)==1,by(missp)  /* systematically lower party institutionalization for missing obs */


********************************
* Additional control variables *
*   Replicating Appendix B-1   *
********************************
	local var = "loggdp logoil lt lpop wdipopurb ythbul4 G_age nmc_logmilper paramil_pers repress inherit supportparty v2x_ex_party v2xnp_client v2xps_party"
foreach v of local var {
	reg $y $ldv C.$d##C.$m ld time `v' if e(sample)==1,cluster(gwf_leaderid)
	di "`v'"
}

* Coefficients of Corruption and Corruption*PI are saved as "DVcoruption.csv" and "DVinteraction.csv"

****************************************
* model unit effects in different ways *
*       Replicating Appendix B-2       *
****************************************
xtset gwf_leaderid year

*Random effects with Lag DV
xtreg $y $ldv C.$d##C.$m $x1 $x2,re vce(cluster gwf_leaderid)

* Two-way FE, HAC errors, without Lag DV *
ivreg2 $y C.$d##C.$m ld leadermil $x2 if e(sample)==1, rob bw(3)

* Results saved as "uniteffect.csv"


*****************************
* Replicating Appendix B-3  *
*****************************

* Just inherited parties: no newly created parties and no absent parties *
reg $y $ldv C.$d##C.$m $x1 $x2 if inherit==1,cluster(gwf_leaderid)

* Just when there is a support party *
reg $y $ldv C.$d##C.$m $x1 $x2 if supportparty==1,cluster(gwf_leaderid)
* Results saved as "supportparty.csv"


*****************************
* Replicating Appendix B-4  *
*****************************
* Check for post-1970 when VParty starts coding *
reg $y $ldv C.$d##C.$m $x1 $x2 if year>=1970,cluster(gwf_leaderid)
* Add coldwar dummy variable*
reg $y $ldv C.$d##C.$m $x1 $x2 coldwar,cluster(gwf_leaderid)
* Results saved as "time.csv"



*****************************
* Replicating Appendix B-5  *
*****************************
*** Dropping variables
reg $y $ldv C.$d##C.$m $x1 $x2,cluster(gwf_leaderid)



*****************************
* Replicating Appendix C  *
*****************************
*make raw data graph
centile pi if e(sample)==1,centile(33)
centile pi if e(sample)==1,centile(66)


export excel mean5 corruptionct pi ld time if e(sample)==1 using "/Users/mpang/Documents/RESEARCH/PartyInstitution/Replication file/regression.xls", firstrow(variables)
* regression.csv is the same as regression.xls

